# Setup file locations to import
beer_csv_loc <- "./StartingDBs/Beers.csv"
breweries_csv_loc <- "./StartingDBs/Breweries.csv"
# Read in data
beer_data <- read.csv(beer_csv_loc, header = TRUE)
brewery_data <- read.csv(breweries_csv_loc, header = TRUE)
# Create a seperate data frame to store state data
### Note: This data set counts the district of columbia as a state
num_of_breweries_by_state <- data.frame(table(brewery_data$State))
#the last six observations to check the merged file
# Change column name of beer_data to match brewery_data to use as a primary key
colnames(beer_data)[5] <- "Brew_ID"
# Merge two data bases using merge
full_brew_data <- merge(brewery_data, beer_data, by="Brew_ID")
# Rename columns 2 and 5 that were changed during the merge
colnames(full_brew_data)[2] <- "Brewery"
colnames(full_brew_data)[5] <- "Beer_Name"
# Print first 6 and last 6 observations
head(full_brew_data, n=6)
## Brew_ID Brewery City State Beer_Name Beer_ID ABV IBU
## 1 1 NorthGate Brewing Minneapolis MN Pumpion 2689 0.060 38
## 2 1 NorthGate Brewing Minneapolis MN Stronghold 2688 0.060 25
## 3 1 NorthGate Brewing Minneapolis MN Parapet ESB 2687 0.056 47
## 4 1 NorthGate Brewing Minneapolis MN Get Together 2692 0.045 50
## 5 1 NorthGate Brewing Minneapolis MN Maggie's Leap 2691 0.049 26
## 6 1 NorthGate Brewing Minneapolis MN Wall's End 2690 0.048 19
## Style Ounces
## 1 Pumpkin Ale 16
## 2 American Porter 16
## 3 Extra Special / Strong Bitter (ESB) 16
## 4 American IPA 16
## 5 Milk / Sweet Stout 16
## 6 English Brown Ale 16
tail(full_brew_data, n=6)
## Brew_ID Brewery City State
## 2405 556 Ukiah Brewing Company Ukiah CA
## 2406 557 Butternuts Beer and Ale Garrattsville NY
## 2407 557 Butternuts Beer and Ale Garrattsville NY
## 2408 557 Butternuts Beer and Ale Garrattsville NY
## 2409 557 Butternuts Beer and Ale Garrattsville NY
## 2410 558 Sleeping Lady Brewing Company Anchorage AK
## Beer_Name Beer_ID ABV IBU Style Ounces
## 2405 Pilsner Ukiah 98 0.055 NA German Pilsener 12
## 2406 Porkslap Pale Ale 49 0.043 NA American Pale Ale (APA) 12
## 2407 Snapperhead IPA 51 0.068 NA American IPA 12
## 2408 Moo Thunder Stout 50 0.049 NA Milk / Sweet Stout 12
## 2409 Heinnieweisse Weissebier 52 0.049 NA Hefeweizen 12
## 2410 Urban Wilderness Pale Ale 30 0.049 NA English Pale Ale 12
# Create CSV Files for head and tail
head_output_csv <- "./output/head_beer_data.csv"
tail_output_csv <- "./output/tail_been_data.csv"
write.csv(head(full_brew_data, n=6), head_output_csv)
write.csv(tail(full_brew_data, n=6), tail_output_csv)
# Create two new tables where one has only ABV with no <NA>'s and the other
state_abv <- data.frame(State=full_brew_data$State, ABV=full_brew_data$ABV)
state_abv <- state_abv %>% drop_na(ABV)
# Do the same for IBU
state_ibu <- data.frame(State=full_brew_data$State, IBU=full_brew_data$IBU)
state_ibu <- state_ibu %>% drop_na(IBU)
##state. Plot a bar chart
# Find the average ABV for the state_abv table
avg_state_abv <- ddply(state_abv, .(State), function(x) mean(x$ABV))
colnames(avg_state_abv)[2] <- "Average_ABV" # Rename the column
# Drop DC. It's not a state....yet
avg_state_abv <- avg_state_abv[-c(8),] # DC is equal to position 8 on the table
# Order the states by Decending ABV content
avg_state_abv <- avg_state_abv[order(avg_state_abv$Average_ABV, decreasing=TRUE),]
# Find Average IBU
avg_state_ibu <- ddply(state_ibu, .(State), function(x) mean(x$IBU))
colnames(avg_state_ibu)[2] <- "Average_IBU" # Rename column
# Order the states by IBU Decending
avg_state_ibu <- avg_state_ibu[order(avg_state_ibu$Average_IBU, decreasing=TRUE),]
# Plot Average Alcohol by Volume per state
ggplot(avg_state_abv, aes(x=reorder(State, desc(Average_ABV)), y=Average_ABV, fill=State)) +
geom_col(show.legend = FALSE, width=.9, position="dodge") +
ggtitle("Average ABV per State") +
xlab("State") +
ylab("Average Alcohol by Volume")
# Plot Average International Bitterness Unit per State
ggplot(avg_state_ibu, aes(x=reorder(State, desc(Average_IBU)), y=Average_IBU, fill=State)) +
geom_col(show.legend = FALSE, width=.9, position="dodge") +
ggtitle("Average IBU per State") +
xlab("State") +
ylab("Average IBU")
# Get the State with the highest average ABV
state_max_abv <- avg_state_abv[which.max(avg_state_abv$Average_ABV),] # NV
# Get the State with the highest average IBU
state_max_ibu <- avg_state_ibu[which.max(avg_state_ibu$Average_IBU),] # WV
#and other types of Ale. Use KNN.
6. Comment on the Summary stats and distribution of the ABV variable
##7. Is there an apparent relationship between the bitterness of the beer and its alcoholic content?